/*
Value TB

Do File for generating summary graphs and tables

Created by: Sedona Sweeney, London School of Hygiene and Tropical Medicine
Last edited: 6 December 2019

*/

clear all
set more off

cd "C:/Users/Sedona/Dropbox/lshtm/Value TB/Value TB/Data/3rd Review - Sedona & Anna/" /// <-- change this directory when working from a different computer

local c_date = c(current_date)
local date_string = subinstr("`c_date'", " " , "", .)
capture mkdir "Analysis/`date_string'"
capture mkdir "Analysis/`date_string'/facilityfiles"


cd "Analysis/`date_string'"
capture mkdir "Graphs"

/*
******************************************************** 
**** Lists of outliers by country and facility level ****
********************************************************



 capture putexcel clear
			 
use "pooled dataset_outputs.dta", clear

local countrycode = countrycode

 encode met_TDvBU, gen(tdbu)
	label var tdbu " "

 keep if totalcost > 0 & servicestatistics > 0 & !missing(unitcost_USD) & aggregate == 0 & overheads == 0 

gen fc_level = 1 if fc_type == "Community Health Unit" | fc_type == "Community health unit" | fc_type == "Health post/dispensary"
replace fc_level = 2 if fc_type == "Health centre"
replace fc_level = 3 if fc_type == "Primary (sub-county/district) hospital"
replace fc_level = 4 if fc_type == "Secondary (county/general) hospital"
replace fc_level = 5 if fc_type == "Reference laboratory"

by output_t fc_country met_TDvBU, sort : egen float mean_country = mean(unitcost_USD)
by output_t fc_country met_TDvBU, sort : egen float sd_country = sd(unitcost_USD)
gen outlier_country = unitcost_USD > (mean_country + (2 * sd_country)) |  unitcost_USD < (mean_country - (2 * sd_country))
replace outlier_country = 0 if sd_country == . | sd_country == 0
gen outlier = outlier_country == 1
replace outlier = 1 if unitcost_USD > 100

by output_t fc_country met_TDvBU, sort : egen float mean_qstaffclin = mean(q_stafftime_clin)
by output_t fc_country met_TDvBU, sort : egen float sd_qstaffclin = sd(q_stafftime_clin)
gen outlier_ct = q_stafftime_clin  > (mean_qstaffclin + (2 * sd_qstaffclin)) |  q_stafftime_clin < (mean_qstaffclin - (2 * sd_qstaffclin))
replace outlier_ct = 0 if q_stafftime_clin == mean_qstaffclin | mean_qstaffclin == 0 | sd_qstaffclin == 0

by output_t fc_country met_TDvBU, sort : egen float mean_qstaffsupport = mean(q_stafftime_support)
by output_t fc_country met_TDvBU, sort : egen float sd_qstaffsupport = sd(q_stafftime_support)
gen outlier_st = q_stafftime_support  > (mean_qstaffsupport + (2 * sd_qstaffsupport)) |  mean_qstaffsupport < (mean_qstaffsupport - (2 * sd_qstaffsupport))
replace outlier_st = 0 if q_stafftime_support == mean_qstaffsupport | mean_qstaffsupport == 0 | sd_qstaffsupport == 0 | sd_country == .

by output_t fc_country met_TDvBU, sort : egen float mean_qstaffvol = mean(q_stafftime_vol)

assertlist outlier_country == 0, excel(analysis_`date_string'.xlsx) sheet(Outliers_outputs) list(fc_country fc_name fc_code met_TDvBU output2 mean_country unitcost_USD) tag(> 2xSD mean by country)
assertlist unitcost_USD < 100, excel(analysis_`date_string'.xlsx) sheet(Outliers_outputs) list(fc_country fc_name fc_code met_TDvBU output2 mean_country unitcost_USD) tag(cost > USD100)
assertlist outlier_ct == 0, excel(analysis_`date_string'.xlsx) sheet(Outliers_ClinStaffTime) list(fc_name fc_code met_TDvBU output2 q_stafftime_clin mean_qstaffclin) tag(> 2xSD mean by country)
assertlist outlier_st == 0, excel(analysis_`date_string'.xlsx) sheet(Outliers_SuppStaffTime) list(fc_name fc_code met_TDvBU output2 q_stafftime_support mean_qstaffsupport) tag(> 2xSD mean by country)


******************************************************************************************* 
**** Graphs with outliers by output_t highlighted outliers by country and facility level ****
*******************************************************************************************

capture putexcel clear
local gphrow_o = 1
levelsof opgrp2, local(opgrp2)
   foreach op2 of local opgrp2 {

stripplot unitcost_USD if opgrp2 == "`op2'" , jitter(2) bar over(tdbu) iqr ///
sep(outlier) mlabel(fc_name, size(tiny)) mlabel(. fc_code) mcolor(green red) mlabcolor(green red)  ///
by(output_t, compact col(1) note("") legend(off) )  ysc(reverse) subtitle(, pos(9) ring(1) nobexpand bcolor(none) ///
placement(e)) ytitle("") xtitle(Unit cost `op2') name(oc_`op2', replace)

		graph export "Graphs/oc_`op2'.png", as(png) replace
		putexcel set analysis_`date_string'.xlsx, sheet("Outliers_outputs") modify
		putexcel S`gphrow_o' = picture("Graphs/oc_`op2'.png") 

	
local gphrow_o = `gphrow_o' + 40
}


capture putexcel clear
local gphrow_o = 1
levelsof opgrp2, local(opgrp2)
   foreach op2 of local opgrp2 {

stripplot q_stafftime_clin if opgrp2 == "`op2'" , jitter(2) bar over(tdbu) iqr ///
sep(outlier_ct) mlabel(fc_name, size(tiny)) mlabel(. fc_code) mcolor(green red) mlabcolor(green red)  ///
by(output_t, compact col(1) note("") legend(off) )  ysc(reverse) subtitle(, pos(9) ring(1) nobexpand bcolor(none) ///
placement(e)) ytitle("") xtitle(Clinical Staff Time `op2') name(ct_`op2', replace)

		graph export "Graphs/ct_`op2'.png", as(png) replace
		putexcel set analysis_`date_string'.xlsx, sheet("Outliers_ClinStaffTime") modify
		putexcel S`gphrow_o' = picture("Graphs/ct_`op2'.png") 

	
local gphrow_o = `gphrow_o' + 40
}


capture putexcel clear
local gphrow_o = 1
levelsof opgrp2, local(opgrp2)
   foreach op2 of local opgrp2 {

stripplot q_stafftime_support if opgrp2 == "`op2'" , jitter(2) bar over(tdbu) iqr ///
sep(outlier_st) mlabel(fc_name, size(tiny)) mlabel(. fc_code) mcolor(green red) mlabcolor(green red)  ///
by(output_t, compact col(1) note("") legend(off) )  ysc(reverse) subtitle(, pos(9) ring(1) nobexpand bcolor(none) ///
placement(e)) ytitle("") xtitle(Support Staff Time `op2') name(st_`op2', replace)

		graph export "Graphs/st_`op2'.png", as(png) replace
		putexcel set analysis_`date_string'.xlsx, sheet("Outliers_SuppStaffTime") modify
		putexcel S`gphrow_o' = picture("Graphs/st_`op2'.png") 

	
local gphrow_o = `gphrow_o' + 40
}

*/

******************************************************************************************* 
**** Patient costs - graphs with outliers by output_t highlighted outliers by country****
*******************************************************************************************


 
use "pooled dataset_patient.dta", clear

replace int_pop2 = int_pop1 if int_type == "Active Case Finding"
replace int_pop1 = "ACF" if int_type == "Active Case Finding"

egen outpatientinpatient = ends(int_phase), last punct(" - ")
replace outpatientinpatient = "" if outpatientinpatient == "Intensive" | outpatientinpatient == "Continuation"

strip int_type, of(],/:[ -) gen (inttype2)
replace inttype2 = subinstr(inttype2,"IntensifiedCaseFinding", "ICF",1)   

strip int_pop1, of(],/:[ -) gen (intpop1)
replace intpop1 = subinstr(intpop1,"householdcontactofDSTB","",1)


egen intervention_long = concat(int_pop1 int_pop2 int_pop3 int_phase int_regimen), punct(-)
strip intervention_long, of(],/:[ ) gen(itemp)
replace itemp = subinstr(itemp,"outpatient", "OP",1)   
replace itemp = subinstr(itemp,"inpatient", "IP",1)   
replace itemp = subinstr(itemp,"--", "-",.)   
encode itemp, gen(intpop_t)

 encode met_TDvBU, gen(tdbu)
	label var tdbu " "

by inttype2 intpop_t fc_country met_TDvBU, sort : egen float mean_country = mean(tcost_USD)
by inttype2 intpop_t fc_country met_TDvBU, sort : egen float sd_country = mean(tcost_USD)
gen outlier_country = tcost_USD > (mean_country + (2 * sd_country)) |  tcost_USD < (mean_country - (2 * sd_country))
replace outlier_country = 0 if tcost_USD == mean_country | sd_country == . | sd_country == 0

gen outlier_toohigh = int_type == "1st Line TB Treatment" & tcost_USD > 1000 | int_type == "2nd Line TB Treatment" & tcost_USD > 10000

gen outlier = outlier_toohigh == 1 | outlier_country == 1
 
assertlist outlier_country == 0, excel(analysis_`date_string'.xlsx) sheet(Outliers_patients) list(fc_country fc_name fc_code met_TDvBU tcost_USD mean_country inttype2 intervention_long) tag(> 2xSD mean by country)
assertlist outlier_toohigh == 0, excel(analysis_`date_string'.xlsx) sheet(Outliers_patients) list(fc_country fc_name fc_code met_TDvBU tcost_USD mean_country inttype2 intervention_long) tag(cost may be too high)



capture putexcel clear
local gphrow_o = 1

levelsof inttype2, local(int)
   foreach i of local int {

   levelsof intpop1 if inttype2 == "`i'", local(pop1)
   foreach ip1 of local pop1 {
   
	
		stripplot tcost_USD if inttype2 == "`i'" & intpop1 == "`ip1'", jitter(2) bar over(tdbu) iqr sep(outlier) ///
		mlabel(fc_name, size(tiny)) mlabel(. fc_code) mcolor(green red) mlabcolor(green red) ///
		by(intpop_t, compact col(1) note("") legend(off) )  ysc(reverse) subtitle(, pos(9) ring(1) nobexpand bcolor(none)placement(e)) ///
		 ytitle("") xtitle(Total cost `i') name(oc_`i', replace)

		graph export "Graphs/oc_`i'.png", as(png) replace
		putexcel set analysis_`date_string'.xlsx, sheet("Outliers_patients") modify
		putexcel S`gphrow_o' = picture("Graphs/oc_`i'.png") 

	
local gphrow_o = `gphrow_o' + 40
}
}



*/

